Use the milk_production.csv data to create the following
charts showing differences from the mean state milk production in
2017.
milk_summary <- milk_production %>%
filter(year == 2017) %>%
mutate(
milk_produced = milk_produced / 10^9,
state = fct_reorder(state, milk_produced))
milk_production %>%
filter(year == 2017) %>%
mutate(
milk_produced = milk_produced / 10^9,
state = fct_reorder(state, milk_produced)) %>%
ggplot() +
geom_point(
aes(x = milk_produced, y = state),
size = 2.5, color = 'steelblue') +
geom_vline(
xintercept = mean(milk_summary$milk_produced),
color = 'red', linetype = 'dashed') +
annotate(
'text', x = 5, y = 'Georgia',
color = 'red', hjust = 0,
label = 'Mean\nProduction') +
theme_minimal_vgrid() +
labs(x = 'Milk produced (billions lbs)',
y = 'State')
milk_summary %>%
mutate(
milk_produced = milk_produced - mean(milk_produced),
barColor = ifelse(milk_produced > 0, 'above', 'below')) %>%
ggplot() +
geom_col(
aes(x = milk_produced, y = state, fill = barColor),
width = 0.7) +
scale_fill_manual(values = c('steelblue', 'sienna')) +
theme_minimal_vgrid() +
theme(legend.position = 'none') +
labs(
x = 'Difference from mean milk produced (billions lbs)',
y = 'State')
Now replicate it but only for the top 10, merging all other states into an “Other” category
top10 <- milk_production %>%
filter(year == 2017) %>%
arrange(desc(milk_produced)) %>%
slice(1:10)
milk_summary <- milk_production %>%
filter(year == 2017) %>%
mutate(
milk_produced = milk_produced / 10^9,
state = fct_other(state, keep = top10$state)) %>%
group_by(state) %>%
summarise(milk_produced = sum(milk_produced)) %>%
ungroup() %>%
mutate(state = fct_reorder(state, milk_produced))
ggplot(milk_summary) +
geom_point(
aes(x = milk_produced, y = state),
size = 2.5, color = 'steelblue') +
geom_vline(
xintercept = mean(milk_summary$milk_produced),
color = 'red', linetype = 'dashed') +
annotate(
'text', x = 19, y = 'California',
color = 'red', hjust = 1,
label = 'Mean\nProduction') +
theme_minimal_vgrid() +
labs(x = 'Milk produced (billions lbs)',
y = 'State')
Using the internet_regions data frame, pick a strategy
and create an improved version of this chart.
Strategies:
internet_regions_compare <- internet_regions %>%
filter(year %in% c(2000, 2015)) %>%
mutate(
numUsers = numUsers / 10^6,
year = as.factor(year))
ggplot(internet_regions_compare) +
geom_col(
aes(x = year, y = numUsers, fill = region),
position = "dodge") +
labs(y = "Millions of internet users")
internet_regions_compare %>%
mutate(region = fct_reorder2(region, year, -numUsers)) %>%
ggplot() +
geom_col(
aes(x = numUsers, y = region, fill = year),
position = "dodge") +
theme_minimal_vgrid(font_size = 18) +
scale_x_continuous(
limits = c(0, 1200),
expand = expansion(mult = c(0, 0.05))) +
scale_fill_manual(values = c('grey', 'gold')) +
labs(x = "Millions of internet users",
y = "Region")
internet_regions_compare %>%
mutate(
region = fct_recode(region,
"Middle East &\nNorth Africa" = "Middle East & North Africa",
"Latin America &\n Caribbean" = "Latin America & Caribbean",
"Europe &\n Central Asia" = "Europe & Central Asia"),
region = fct_reorder2(region, year, -numUsers)) %>%
ggplot() +
geom_col(
aes(x = year, y = numUsers, fill = year),
width = 0.6) +
facet_wrap(vars(region), nrow = 1) +
theme_minimal_hgrid(font_size = 18) +
theme(legend.position = "none") +
scale_y_continuous(
limits = c(0, 1200),
expand = expansion(mult = c(0, 0.05))) +
scale_fill_manual(values = c('grey', 'gold')) +
labs(y = "Millions of internet users")
internet_regions_compare %>%
mutate(region = fct_reorder2(region, year, -numUsers)) %>%
spread(year, numUsers) %>%
ggplot() +
geom_col(
aes(x = `2000`, y = region, fill = "2000"),
width = 0.7) +
geom_col(
aes(x = `2015`, y = region, fill = "2015"),
width = 0.3) +
theme_minimal_vgrid(font_size = 18) +
scale_x_continuous(
limits = c(0, 1200),
expand = expansion(mult = c(0, 0.05))) +
scale_fill_manual(values = c('grey', 'black')) +
labs(y = "Millions of internet users",
fill = "Year")
internet_regions_compare %>%
mutate(region = fct_reorder2(region, year, -numUsers)) %>%
ggplot(aes(x = numUsers, y = region)) +
geom_line(
aes(group = region),
color = 'lightblue', size = 1) +
geom_point(aes(color = year), size = 2.5) +
scale_color_manual(values = c('lightblue', 'steelblue')) +
scale_x_continuous(limits = c(0, 1200)) +
theme_minimal_vgrid() +
# Remove y axis line
theme(axis.line.y = element_blank()) +
labs(x = 'Millions of internet users',
y = 'Region',
color = 'Year',
title = 'Number of internet users by world region',
subtitle = "(2000 - 2015)")
internet_regions_compare %>%
mutate(
region = fct_reorder2(region, year, -numUsers),
region_asia = ifelse(region == "East Asia & Pacific", "asia", "other"),
label = paste(region, ' (', round(numUsers), ')'),
label_left = ifelse(year == 2000, label, NA),
label_right = ifelse(year == 2015, label, NA)) %>%
ggplot(aes(x = year, y = numUsers,
group = region)) +
geom_line(aes(color = region_asia), size = 1) +
# Add 2000 labels (left side)
geom_text_repel(
aes(label = label_left),
hjust = 1, nudge_x = -0.05,
direction = 'y', segment.color = 'grey') +
# Add 2015 labels (right side)
geom_text_repel(
aes(label = label_right),
hjust = 0, nudge_x = 0.05,
direction = 'y', segment.color = 'grey') +
# Move year labels to top
scale_x_discrete(position = 'top') +
# Annotate & adjust theme
scale_color_manual(values = c("red", "black")) +
theme_minimal_grid() +
theme(panel.grid = element_blank(),
axis.text.y = element_blank(),
axis.ticks = element_blank(),
legend.position = 'none') +
labs(x = NULL,
y = 'Millions of internet users',
title = 'Number of internet users by world region',
subtitle = "(2000 - 2015)")
Use the gapminder.csv data to create the following
charts comparing the distribution of life expectancy across countries in
continents in 2007.
gapminder_2007 <- gapminder %>%
filter(year == 2007) %>%
mutate(continent = fct_reorder(continent, lifeExp))
gapminder_densities <- ggplot(gapminder_2007) +
geom_density(
aes(x = lifeExp, y = ..count.., fill = continent),
alpha = 0.4) +
scale_y_continuous(
expand = expansion(mult = c(0, 0.05))) +
scale_fill_brewer(palette = 'Accent') +
theme_minimal_hgrid() +
labs(
x = 'Life expectancy (years)',
y = 'Count',
fill = 'Continent',
title = 'Distribution of life expectancy across\ncountries in continent in 2007')
gapminder_densities
gapminder_densities +
facet_wrap(vars(continent), nrow = 1) +
theme(legend.position = "none") +
labs(title = 'Distribution of life expectancy across countries in continent in 2007')
gapminder_2007 %>%
filter(continent != 'Oceania') %>%
ggplot() +
geom_density_ridges(
aes(x = lifeExp, y = continent),
scale = 1.5, alpha = 0.7) +
scale_y_discrete(expand = c(0, 0)) +
scale_x_continuous(expand = c(0, 0)) +
coord_cartesian(clip = "off") +
theme_ridges() +
labs(x = 'Life expectancy (years)',
y = 'Continent',
title = 'Distribution of life expectancy across\ncountries in continent in 2007')